netmhc_all = readRDS("../data/neoAg_netMHCpan4_1.rds")
colnames(netmhc_all)
## [1] "Pos" "ID" "HLA" "Peptide_mut" "EL_score_mut"
## [6] "EL_rank_mut" "Peptide_ref" "EL_score_ref" "EL_rank_ref" "sample"
dim(netmhc_all)
## [1] 1617468 10
head(netmhc_all)
netmhc_all = as.data.table(netmhc_all)
netmhc_mut = netmhc_all[,.(EL_score_mut_max = max(EL_score_mut)),
by = .(ID, sample)]
dim(netmhc_mut)
## [1] 31294 3
netmhc_mut
netmhc_mut$key = gsub("_", ":", netmhc_mut$ID)
netmhc_mut$sample = gsub("_hlai", "", netmhc_mut$sample)
g1 = ggplot(netmhc_mut, aes(x=EL_score_mut_max)) + xlab("NetMHC score (mut)") +
geom_histogram(color="darkblue", fill="lightblue")
g1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
netmhc_ref = netmhc_all[,.(EL_score_ref_max = max(EL_score_ref)),
by = .(ID, sample)]
dim(netmhc_ref)
## [1] 31294 3
netmhc_ref
netmhc_ref$key = gsub("_", ":", netmhc_ref$ID)
netmhc_ref$sample = gsub("_hlai", "", netmhc_ref$sample)
g1 = ggplot(netmhc_ref, aes(x=EL_score_ref_max)) + xlab("NetMHC score (ref)") +
geom_histogram(color="darkblue", fill="lightblue")
g1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
table(netmhc_mut$ID == netmhc_ref$ID)
##
## TRUE
## 31294
table(netmhc_mut$sample == netmhc_ref$sample)
##
## TRUE
## 31294
netmhc = merge(netmhc_mut, netmhc_ref)
netmhc
gg1 = ggplot(data = netmhc, mapping = aes(x = EL_score_ref_max,
y = EL_score_mut_max)) +
geom_pointdensity(size=1) + xlab("NetMHCpan-4.1 ref") +
ylab("NetMHCpan-4.1 mut") +
scale_color_viridis() + theme(legend.position = "top")
pdf("step4_nb_analysis_files/netmhc_ref_vs_mut.pdf", width=3.5, height=4.2)
gg1
dev.off()
## quartz_off_screen
## 2
ppmint_mut = fread("../output_PEPPRMINT/PEPPRMINT_Riaz_2017_aggregate.tsv")
dim(ppmint_mut)
## [1] 31298 3
ppmint_mut
summary(ppmint_mut$pepprmint)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000057 0.2962712 0.5442800 0.5168409 0.7257650 0.9994867
setequal(ppmint_mut$sample, netmhc$sample)
## [1] TRUE
length(setdiff(ppmint_mut$key, netmhc$key))
## [1] 1136
g1 = ggplot(ppmint_mut, aes(x=pepprmint)) + xlab("PEPPRMINT score (mut)") +
geom_histogram(color="darkblue", fill="lightblue")
g1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppmint_ref = fread("../output_PEPPRMINT/PEPPRMINT_Riaz_2017_ref_aggregate.tsv")
dim(ppmint_ref)
## [1] 31298 3
ppmint_ref
summary(ppmint_ref$pepprmint)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000077 0.2605880 0.4955733 0.4915124 0.7253583 0.9992200
table(ppmint_mut$key == ppmint_ref$key)
##
## TRUE
## 31298
table(ppmint_mut$sample == ppmint_ref$sample)
##
## TRUE
## 31298
g1 = ggplot(ppmint_ref, aes(x=pepprmint)) + xlab("PEPPRMINT score (ref)") +
geom_histogram(color="darkblue", fill="lightblue")
g1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppmint = merge(ppmint_mut, ppmint_ref, by = c("key", "sample"),
suffixes = c("_mut", "_ref"))
ppmint
gg2 = ggplot(data = ppmint, mapping = aes(x = pepprmint_ref, y = pepprmint_mut)) +
geom_pointdensity(size=1) + xlab("PEPPRMINT ref") + ylab("PEPPRMINT mut") +
scale_color_viridis() + theme(legend.position = "top")
pdf("step4_nb_analysis_files/pepprmint_ref_vs_mut.pdf", width=3.5, height=4.2)
gg2
dev.off()
## quartz_off_screen
## 2
sample_mb = fread(file = "../data/riaz_patient_mb_info.txt")
dim(sample_mb)
## [1] 104 3
sample_mb[1:2,]
ppmint$Patient = sub("_.*", "", ppmint$sample)
netmhc$Patient = sub("_.*", "", netmhc$sample)
sample_mb$Patient = sub("_.*", "", sample_mb$sample)
clinic = read_excel("../data/_supp/mmc2.xlsx", skip=1)
dim(clinic)
## [1] 73 12
clinic[1:2,]
clinic = as.data.frame(clinic)
table(clinic$Patient %in% sample_mb$Patient)
##
## FALSE TRUE
## 8 65
table(tolower(clinic$Patient) %in% tolower(sample_mb$Patient))
##
## FALSE TRUE
## 8 65
patient = intersect(clinic$Patient, sample_mb$Patient)
length(patient)
## [1] 65
patient = intersect(patient, ppmint$Patient)
length(patient)
## [1] 60
clinic[which(! clinic$Patient %in% patient),]
clinic = clinic[match(patient, clinic$Patient),]
dim(clinic)
## [1] 60 12
head(clinic)
names(clinic)[c(4:5,7)] = c("Dead", "Time", "Mutation")
head(clinic)
table(clinic$Response)
##
## CR NE PD PR SD
## 3 3 23 10 21
clinic[["Response3"]] = clinic$Response
clinic$Response3[which(clinic$Response == "NE")] = "PD"
clinic$Response3[which(clinic$Response == "CR")] = "PRCR"
clinic$Response3[which(clinic$Response == "PR")] = "PRCR"
clinic$Response = clinic$Response3
clinic$Response[which(clinic$Response3 == "SD")] = "PDSD"
clinic$Response[which(clinic$Response3 == "PD")] = "PDSD"
table(clinic$Response, clinic$Response3)
##
## PD PRCR SD
## PDSD 26 0 21
## PRCR 0 13 0
clinic$Cohort[which(clinic$Cohort == "NIV3-NAIVE")] = "Ipi naive"
clinic$Cohort[which(clinic$Cohort == "NIV3-PROG")] = "Ipi prog"
mat_pre = match(paste0(patient, "_pre"), sample_mb$sample)
clinic[["mb_pre"]] = log10(sample_mb$n_mutations_neoAg[mat_pre] + 1)
mat_on = match(paste0(patient, "_on"), sample_mb$sample)
clinic[["mb_on"]] = log10(sample_mb$n_mutations_neoAg[mat_on] + 1)
clinic$mut_diff = clinic$mb_pre - clinic$mb_on
anova(lm(mut_diff ~ Response, data = clinic))
anova(lm(mb_pre ~ Response, data = clinic))
p0 = ggplot(clinic, aes(x=Response, y=mb_pre)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(position=position_jitter(0.2), size=1) +
labs(title="", x="Response", y = "log10(MB)")
p0
p0 = ggplot(clinic, aes(x=Response, y=mb_pre - mb_on)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(position=position_jitter(0.2), size=1) +
labs(title="", x="Response", y = "log10(MB) decrease")
p0
## Warning: Removed 23 rows containing non-finite values (stat_boxplot).
## Warning: Removed 23 rows containing missing values (geom_point).
res_cox = coxph(Surv(Time, Dead) ~ mb_pre, data = clinic)
res_cox
## Call:
## coxph(formula = Surv(Time, Dead) ~ mb_pre, data = clinic)
##
## coef exp(coef) se(coef) z p
## mb_pre 0.04681 1.04793 0.21031 0.223 0.824
##
## Likelihood ratio test=0.05 on 1 df, p=0.8232
## n= 60, number of events= 35
cutoffs = seq(0.75, 0.95, by=0.05)
pvals = matrix(NA, nrow=length(cutoffs), ncol=2)
netmhc$sample = as.factor(netmhc$sample)
ppmint$sample = as.factor(ppmint$sample)
clinic$`Cytolytic Score` = as.numeric(clinic$`Cytolytic Score`)
## Warning: NAs introduced by coercion
summary(clinic$cytolytic_score)
## Length Class Mode
## 0 NULL NULL
g1 = ggplot(clinic, aes(x=log10(`Cytolytic Score`))) +
xlab("log10(Cytolytic Score)") +
geom_histogram(color="darkblue", fill="lightblue")
g1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 18 rows containing non-finite values (stat_bin).
clinic[["cytolytic_score"]] = log10(clinic$`Cytolytic Score`)
for(i in 1:length(cutoffs)){
ci = cutoffs[i]
tti = table(netmhc$sample[netmhc$EL_score_mut_max >= ci])
mat_pre = match(paste0(patient, "_pre"), names(tti))
nb_netmhc_pre = log10(tti[mat_pre] + 1)
pvals[i,1] = anova(lm(clinic$cytolytic_score ~ nb_netmhc_pre))$`Pr(>F)`[1]
tti = table(ppmint$sample[ppmint$pepprmint_mut >= ci])
mat_pre = match(paste0(patient, "_pre"), names(tti))
nb_ppmint_pre = log10(tti[mat_pre] + 1)
pvals[i,2] = anova(lm(clinic$cytolytic_score ~ nb_ppmint_pre))$`Pr(>F)`[1]
}
pvals
## [,1] [,2]
## [1,] 0.1785801 0.08309863
## [2,] 0.1951604 0.04960438
## [3,] 0.2340431 0.03979126
## [4,] 0.2656822 0.03564467
## [5,] 0.1552959 0.07081369
pval_df = data.frame(pval = c(pvals),
cutoff = rep(as.character(cutoffs), times=2),
method = rep(c("NetMHCpan-4.1", "PEPPRMINT"), each=5))
pm = ggplot(pval_df, aes(x = method, y = -log10(pval), fill = cutoff)) +
geom_bar( stat="identity", position=position_dodge()) +
scale_fill_brewer(palette="Paired") + theme(legend.position = "top") +
geom_hline(yintercept = -log10(0.05), lty=2) +
guides(fill=guide_legend(nrow=2, byrow=TRUE))
pdf("step4_nb_analysis_files/nb_vs_cytolytic_pvals.pdf", width=2.8, height=4)
print(pm)
dev.off()
## quartz_off_screen
## 2
anova(lm(clinic$cytolytic_score ~ clinic$mb_pre))
Here we use 0.9 as cutoff of neoantigen score.
ci = 0.9
tti = table(netmhc$sample[netmhc$EL_score_mut_max >= ci])
mat_pre = match(paste0(patient, "_pre"), names(tti))
nb_netmhc_pre = log10(tti[mat_pre] + 1)
mat_on = match(paste0(patient, "_on"), names(tti))
nb_netmhc_on = log10(tti[mat_on] + 1)
tti = table(ppmint$sample[ppmint$pepprmint_mut >= ci])
mat_pre = match(paste0(patient, "_pre"), names(tti))
nb_ppmint_pre = log10(tti[mat_pre] + 1)
mat_on = match(paste0(patient, "_on"), names(tti))
nb_ppmint_on = log10(tti[mat_on] + 1)
clinic[["nb_netmhc_pre"]] = nb_netmhc_pre
clinic[["nb_ppmint_pre"]] = nb_ppmint_pre
clinic2 = clinic[which(!is.na(clinic$`Cytolytic Score`)),]
lm1 = lm(cytolytic_score ~ mb_pre, data=clinic2)
lm1
##
## Call:
## lm(formula = cytolytic_score ~ mb_pre, data = clinic2)
##
## Coefficients:
## (Intercept) mb_pre
## 2.049 0.157
pval = anova(lm1)$`Pr(>F)`[1]
g1 = ggplot(clinic2, aes(x=mb_pre, y=cytolytic_score)) +
geom_point() + xlab("mutation burden\n") + geom_smooth(method=lm) +
ggtitle(sprintf("pvalue = %.3f", pval))
pval = anova(lm(cytolytic_score ~ nb_netmhc_pre, data=clinic2))$`Pr(>F)`[1]
g2 = ggplot(clinic2, aes(x=nb_netmhc_pre, y=cytolytic_score)) +
geom_point() + xlab("neoantigen burden \n(NetMHCpan 4.1)") +
geom_smooth(method=lm) +
ggtitle(sprintf("pvalue = %.3f", pval))
pval = anova(lm(cytolytic_score ~ nb_ppmint_pre, data=clinic2))$`Pr(>F)`[1]
g3 = ggplot(clinic2, aes(x=nb_ppmint_pre, y=cytolytic_score)) +
geom_point() + xlab("neoantigen burden \n(PEPPRMINT)") +
geom_smooth(method=lm) +
ggtitle(sprintf("pvalue = %.3f", pval))
gg1 = ggarrange(g1, g2, g3, ncol = 3,
common.legend = TRUE, legend = "bottom")
## `geom_smooth()` using formula 'y ~ x'
## Don't know how to automatically pick scale for object of type table. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
## Don't know how to automatically pick scale for object of type table. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Don't know how to automatically pick scale for object of type table. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
## Don't know how to automatically pick scale for object of type table. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'
gg1
pdf("step4_nb_analysis_files/nb_vs_cytolytic.pdf", width=8, height=3)
print(gg1)
dev.off()
## quartz_off_screen
## 2
ci = 0.9
tti = table(netmhc$sample[netmhc$EL_score_mut_max >= ci])
mat_pre = match(paste0(patient, "_pre"), names(tti))
nb_netmhc_pre = log10(as.numeric(tti[mat_pre]) + 1)
mat_on = match(paste0(patient, "_on"), names(tti))
nb_netmhc_on = log10(as.numeric(tti[mat_on]) + 1)
tti = table(ppmint$sample[ppmint$pepprmint_mut >= ci])
mat_pre = match(paste0(patient, "_pre"), names(tti))
nb_ppmint_pre = log10(as.numeric(tti[mat_pre]) + 1)
mat_on = match(paste0(patient, "_on"), names(tti))
nb_ppmint_on = log10(as.numeric(tti[mat_on]) + 1)
clinic[["nb_netmhc_pre"]] = nb_netmhc_pre
clinic[["nb_ppmint_pre"]] = nb_ppmint_pre
clinic[["nb_netmhc_on"]] = nb_netmhc_on
clinic[["nb_ppmint_on"]] = nb_ppmint_on
clinic$mut_diff = clinic$mb_pre - clinic$mb_on
anova(lm(mut_diff ~ Response, data = clinic))
clinic$nb_netmhc_diff = clinic$nb_netmhc_pre - clinic$nb_netmhc_on
anova(lm(nb_netmhc_diff ~ Response, data = clinic))
clinic$nb_ppmint_diff = clinic$nb_ppmint_pre - clinic$nb_ppmint_on
anova(lm(nb_ppmint_diff ~ Response, data = clinic))
table(clinic$Response, is.na(clinic$mut_diff))
##
## FALSE TRUE
## PDSD 29 18
## PRCR 8 5
table(clinic$Response, is.na(clinic$nb_netmhc_diff))
##
## FALSE TRUE
## PDSD 27 20
## PRCR 3 10
table(clinic$Response, is.na(clinic$nb_ppmint_diff))
##
## FALSE TRUE
## PDSD 27 20
## PRCR 3 10
clinic[clinic$Response == "PRCR" & !is.na(clinic$nb_ppmint_diff),]
clinic$Response = factor(clinic$Response, levels = c("PDSD", "PRCR"))
g0 = ggplot(clinic, aes(x = nb_netmhc_pre, y = nb_netmhc_on, color=Response)) +
geom_jitter(width = 0.02, height=0.02, size = 1) +
xlab("pre-therapy") + ylab("on-therapy") +
ggtitle("NetMHCpan-4.1 \n neoantigen burden") + xlim(-0.02, 2.2)
g1 = ggplot(clinic, aes(x = nb_ppmint_pre, y = nb_ppmint_on, color=Response)) +
geom_jitter(width = 0.02, height=0.02, size = 1) +
xlab("pre-therapy") + ylab("on-therapy") +
ggtitle("PEPPRMINT \n neoantigen burden") + xlim(-0.02, 2.2)
gg2 = ggarrange(g0, g1, ncol = 2, common.legend = TRUE, legend = "bottom",
labels = c("(A)", "(B)"))
## Warning: Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing missing values (geom_point).
gg2
pdf("step4_nb_analysis_files/compare_nb_pre_on.pdf", width=5.6, height=3)
print(gg2)
dev.off()
## quartz_off_screen
## 2
cols = c("sample", "key", "EL_score_mut_max", "EL_score_ref_max", "Patient")
netmhc = netmhc[, ..cols]
median(netmhc$EL_score_ref_max)
## [1] 0.2206
table(netmhc$EL_score_mut_max > 0.8 & netmhc$EL_score_ref_max < 0.22)
##
## FALSE TRUE
## 30981 313
table(netmhc$EL_score_mut_max > 0.8 & netmhc$EL_score_ref_max < 0.2)
##
## FALSE TRUE
## 31006 288
table(netmhc$EL_score_ref_max > 0.8 & netmhc$EL_score_mut_max < 0.2)
##
## FALSE TRUE
## 31115 179
table(netmhc$EL_score_mut_max > 0.9 & netmhc$EL_score_ref_max < 0.22)
##
## FALSE TRUE
## 31153 141
table(netmhc$EL_score_mut_max > 0.9 & netmhc$EL_score_ref_max < 0.1)
##
## FALSE TRUE
## 31223 71
table(netmhc$EL_score_ref_max > 0.9 & netmhc$EL_score_mut_max < 0.1)
##
## FALSE TRUE
## 31251 43
cols = c("sample", "key", "pepprmint_mut", "pepprmint_ref", "Patient")
ppmint = ppmint[, ..cols]
median(ppmint$pepprmint_ref)
## [1] 0.4955733
table(ppmint$pepprmint_mut > 0.8 & ppmint$pepprmint_ref < 0.5)
##
## FALSE TRUE
## 30830 468
table(ppmint$pepprmint_mut > 0.8 & ppmint$pepprmint_ref < 0.2)
##
## FALSE TRUE
## 31206 92
table(ppmint$pepprmint_ref > 0.8 & ppmint$pepprmint_mut < 0.2)
##
## FALSE TRUE
## 31271 27
table(ppmint$pepprmint_mut > 0.9 & ppmint$pepprmint_ref < 0.5)
##
## FALSE TRUE
## 31150 148
table(ppmint$pepprmint_mut > 0.9 & ppmint$pepprmint_ref < 0.1)
##
## FALSE TRUE
## 31290 8
table(ppmint$pepprmint_ref > 0.9 & ppmint$pepprmint_mut < 0.1)
##
## FALSE TRUE
## 31297 1
neoAg_netMHC = netmhc[EL_score_mut_max > 0.9 & EL_score_ref_max < 0.22]
neoAg_ppmint = ppmint[pepprmint_mut > 0.9 & ppmint$pepprmint_ref < 0.5]
dim(neoAg_netMHC)
## [1] 141 5
dim(neoAg_ppmint)
## [1] 148 5
neoAg_netMHC[1:2,]
neoAg_ppmint[1:2,]
t1 = table(neoAg_netMHC$key)
table(t1)
## t1
## 1 2
## 91 25
t2 = table(neoAg_ppmint$key)
table(t2)
## t2
## 1 2
## 106 21
df_both = merge(neoAg_netMHC, neoAg_ppmint, by=c("sample", "key"))
dim(df_both)
## [1] 34 8
df_both[1:2,]
t1 = table(neoAg_netMHC$sample)
t2 = table(neoAg_ppmint$sample)
table(t1)
## t1
## 0 1 2 3 4 5 6 35
## 38 26 12 5 2 3 3 1
table(t2)
## t2
## 0 1 2 3 4 5 7 10 54
## 49 17 8 9 3 1 1 1 1
sort(t1, decreasing = TRUE)[1:10]
##
## Pt54_pre Pt47_on Pt47_pre Pt65_pre Pt7_pre Pt72_pre Pt87_pre Pt44_pre
## 35 6 6 6 5 5 5 4
## Pt94_pre Pt106_pre
## 4 3
sort(t2, decreasing = TRUE)[1:10]
##
## Pt54_pre Pt7_pre Pt106_pre Pt49_pre Pt23_on Pt4_pre Pt65_pre Pt23_pre
## 54 10 7 5 4 4 4 3
## Pt60_on Pt60_pre
## 3 3
t1p = t1[t1 > 0]
t2p = t2[t2 > 0]
length(t1p)
## [1] 52
length(t2p)
## [1] 41
length(intersect(names(t1p), names(t2p)))
## [1] 37
t1 = table(neoAg_netMHC$Patient)
t2 = table(neoAg_ppmint$Patient)
table(t1)
## t1
## 1 2 3 4 5 6 12 35
## 16 6 3 6 3 3 1 1
table(t2)
## t2
## 1 2 3 4 5 6 7 10 54
## 8 7 3 4 1 3 2 1 1
sort(t1, decreasing = TRUE)[1:10]
##
## Pt54 Pt47 Pt65 Pt8 Pt92 Pt7 Pt72 Pt87 Pt13 Pt3
## 35 12 6 6 6 5 5 5 4 4
sort(t2, decreasing = TRUE)[1:10]
##
## Pt54 Pt7 Pt106 Pt23 Pt60 Pt8 Pt86 Pt49 Pt4 Pt47
## 54 10 7 7 6 6 6 5 4 4
length(intersect(names(t1), names(t2)))
## [1] 28
table(clinic$Patient %in% names(t1), clinic$Response)
##
## PDSD PRCR
## FALSE 20 1
## TRUE 27 12
table(clinic$Patient %in% names(t2), clinic$Response)
##
## PDSD PRCR
## FALSE 27 3
## TRUE 20 10
fisher.test(clinic$Patient %in% names(t1), clinic$Response)
##
## Fisher's Exact Test for Count Data
##
## data: clinic$Patient %in% names(t1) and clinic$Response
## p-value = 0.02282
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 1.107098 397.824196
## sample estimates:
## odds ratio
## 8.644851
fisher.test(clinic$Patient %in% names(t2), clinic$Response)
##
## Fisher's Exact Test for Count Data
##
## data: clinic$Patient %in% names(t2) and clinic$Response
## p-value = 0.05747
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.9634899 28.0179208
## sample estimates:
## odds ratio
## 4.389661
load("../data/riaz_mutdata.RData")
dim(riaz_mutdata)
## [1] 31303 104
names(riaz_mutdata)
## [1] "id"
## [2] "QSS"
## [3] "TQSS"
## [4] "NT"
## [5] "QSS_NT"
## [6] "TQSS_NT"
## [7] "SGT"
## [8] "SOMATIC"
## [9] "seqnames"
## [10] "start"
## [11] "end"
## [12] "REF"
## [13] "ALT"
## [14] "nAltTumor"
## [15] "nRefTumor"
## [16] "nAltNormal"
## [17] "nRefNormal"
## [18] "rdNormalFiltered"
## [19] "rdTumorFiltered"
## [20] "type"
## [21] "vafTumor"
## [22] "vafNormal"
## [23] "contig"
## [24] "position"
## [25] "context"
## [26] "ref_allele"
## [27] "alt_allele"
## [28] "tumor_name"
## [29] "normal_name"
## [30] "score"
## [31] "dbsnp_site"
## [32] "covered"
## [33] "power"
## [34] "tumor_power"
## [35] "normal_power"
## [36] "normal_power_nsp"
## [37] "normal_power_wsp"
## [38] "total_reads"
## [39] "map_Q0_reads"
## [40] "init_t_lod"
## [41] "t_lod_fstar"
## [42] "t_lod_fstar_forward"
## [43] "t_lod_fstar_reverse"
## [44] "tumor_f"
## [45] "contaminant_fraction"
## [46] "contaminant_lod"
## [47] "t_q20_count"
## [48] "t_ref_count"
## [49] "t_alt_count"
## [50] "t_ref_sum"
## [51] "t_alt_sum"
## [52] "t_ref_max_mapq"
## [53] "t_alt_max_mapq"
## [54] "t_ins_count"
## [55] "t_del_count"
## [56] "normal_best_gt"
## [57] "init_n_lod"
## [58] "normal_f"
## [59] "n_q20_count"
## [60] "n_ref_count"
## [61] "n_alt_count"
## [62] "n_ref_sum"
## [63] "n_alt_sum"
## [64] "power_to_detect_positive_strand_artifact"
## [65] "power_to_detect_negative_strand_artifact"
## [66] "strand_bias_counts"
## [67] "tumor_alt_fpir_median"
## [68] "tumor_alt_fpir_mad"
## [69] "tumor_alt_rpir_median"
## [70] "tumor_alt_rpir_mad"
## [71] "observed_in_normals_count"
## [72] "failure_reasons"
## [73] "judgement"
## [74] "Func.ensGene"
## [75] "Gene.ensGene"
## [76] "GeneDetail.ensGene"
## [77] "ExonicFunc.ensGene"
## [78] "AAChange.ensGene"
## [79] "SIFT_score"
## [80] "SIFT_converted_rankscore"
## [81] "SIFT_pred"
## [82] "MutationTaster_pred"
## [83] "MutationAssessor_pred"
## [84] "FATHMM_pred"
## [85] "PROVEAN_pred"
## [86] "MetaSVM_pred"
## [87] "MetaLR_pred"
## [88] "GTEx_V6_gene"
## [89] "GTEx_V6_tissue"
## [90] "ExAC_nontcga_ALL"
## [91] "avsnp150"
## [92] "CLINSIG"
## [93] "CLNDBN"
## [94] "CLNACC"
## [95] "CLNDSDB"
## [96] "CLNDSDBID"
## [97] "nonSynonymous"
## [98] "W"
## [99] "pos"
## [100] "M"
## [101] "Wseq"
## [102] "seqpos"
## [103] "Mseq"
## [104] "EnsembleID"
riaz_mutdata = riaz_mutdata[,c(1, 9:13, 74:75, 77:78)]
riaz_mutdata$key = paste(gsub("chr", "", riaz_mutdata$seqnames),
riaz_mutdata$start, sep=":")
riaz_mutdata$key = paste(riaz_mutdata$key,
riaz_mutdata$REF, sep=":")
riaz_mutdata$key = paste(riaz_mutdata$key,
riaz_mutdata$ALT, sep=":")
riaz_mutdata[1:2,]
table(neoAg_ppmint$key %in% riaz_mutdata$key)
##
## TRUE
## 148
mat1 = match(neoAg_netMHC$key, riaz_mutdata$key)
neoAg_netMHC_with_anno = cbind(neoAg_netMHC, riaz_mutdata[mat1,-(1:6)])
mat2 = match(neoAg_ppmint$key, riaz_mutdata$key)
neoAg_ppmint_with_anno = cbind(neoAg_ppmint, riaz_mutdata[mat2,-(1:6)])
t1 = table(neoAg_netMHC_with_anno$Gene.ensGene)
table(t1)
## t1
## 1 2 3
## 81 24 2
sort(t1, decreasing = TRUE)[1:10]
##
## SEZ6L TTN AGBL1 ATP2B2 C8A CD33 COL12A1 COL6A2 DNAH2 EXOC6
## 3 3 2 2 2 2 2 2 2 2
t2 = table(neoAg_ppmint_with_anno$Gene.ensGene)
table(t2)
## t2
## 1 2
## 96 26
sort(t2, decreasing = TRUE)[1:10]
##
## AGBL1 AP000275.65;C21orf59 CD33
## 2 2 2
## COL12A1 COL6A2 COL6A5
## 2 2 2
## FAM135B FGD2 GLRB
## 2 2 2
## MACC1
## 2
fwrite(neoAg_netMHC_with_anno,
file="step4_nb_analysis_files/neoAg_netMHC_with_anno.tsv", sep = "\t")
fwrite(neoAg_ppmint_with_anno,
file="step4_nb_analysis_files/neoAg_ppmint_with_anno.tsv", sep = "\t")
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] viridis_0.5.1 viridisLite_0.3.0 dplyr_1.0.2
## [4] survminer_0.4.8 ggpubr_0.4.0 survival_3.2-7
## [7] readxl_1.3.1 ggpointdensity_0.1.0 ggplot2_3.3.3
## [10] data.table_1.13.6
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.5 lattice_0.20-41 tidyr_1.1.2 zoo_1.8-8
## [5] digest_0.6.27 R6_2.5.0 cellranger_1.1.0 backports_1.2.1
## [9] evaluate_0.14 pillar_1.4.7 rlang_0.4.10 curl_4.3
## [13] car_3.0-10 Matrix_1.3-0 rmarkdown_2.6 labeling_0.4.2
## [17] splines_4.0.3 stringr_1.4.0 foreign_0.8-81 munsell_0.5.0
## [21] broom_0.7.3 compiler_4.0.3 xfun_0.19 pkgconfig_2.0.3
## [25] mgcv_1.8-33 htmltools_0.5.0 tidyselect_1.1.0 tibble_3.0.4
## [29] gridExtra_2.3 km.ci_0.5-2 rio_0.5.16 crayon_1.3.4
## [33] withr_2.3.0 grid_4.0.3 nlme_3.1-151 jsonlite_1.7.2
## [37] xtable_1.8-4 gtable_0.3.0 lifecycle_0.2.0 magrittr_2.0.1
## [41] KMsurv_0.1-5 scales_1.1.1 zip_2.1.1 stringi_1.5.3
## [45] carData_3.0-4 farver_2.0.3 ggsignif_0.6.0 ellipsis_0.3.1
## [49] survMisc_0.5.5 generics_0.1.0 vctrs_0.3.6 cowplot_1.1.1
## [53] openxlsx_4.2.3 RColorBrewer_1.1-2 tools_4.0.3 forcats_0.5.0
## [57] glue_1.4.2 purrr_0.3.4 hms_0.5.3 abind_1.4-5
## [61] yaml_2.2.1 colorspace_2.0-0 rstatix_0.6.0 knitr_1.30
## [65] haven_2.3.1